### AUTOGENERATED from core_named_ops.py
### To regenerate, run: bin/update_core_linalg_named_ops.sh
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: copy
  cpp_class_name: CopyOp
  doc: |-
    Copies the tensor elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        attr_name: cast
        type_var: U
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: elemwise_unary
  cpp_class_name: ElemwiseUnaryOp
  doc: |-
    Applies the unary function fun elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: fun
    kind: unary_fn_attr
    default_fn: exp
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        attr_name: fun
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: elemwise_binary
  cpp_class_name: ElemwiseBinaryOp
  doc: |-
    Applies the binary function fun elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: fun
    kind: binary_fn_attr
    default_fn: add
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        attr_name: fun
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: lhs
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul
  cpp_class_name: MatmulOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul_unsigned
  cpp_class_name: MatmulUnsignedOp
  doc: |-
    Performs an unsigned matrix multiplication of two 2D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_unsigned
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_unsigned
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: quantized_matmul
  cpp_class_name: QuantizedMatmulOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. The quantized variant
    includes zero-point adjustments for the left and right operands of the
    matmul.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
  - !LinalgOperandDefConfig
    name: AZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: BZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: A
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: AZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: B
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: BZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: mmt4d
  cpp_class_name: Mmt4DOp
  doc: |-
    Performs a matrix-matrix-transpose multiplication of two 4D inputs.

    Differences from linalg.matmul:
    * The right hand side is transposed, whence the 't' in 'mmt'.
    * The input and output tensors have a 4D shape instead of a 2D shape. They
      are interpreted as 2D matrices with one level of 2D tile subdivision,
      whence the 2+2=4 dimensions. The inner tile dimensions are identified with
      '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads
      as: MxK tiles, each of shape M0xK0.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: LhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2, s3)>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: RhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)>
  - !LinalgOperandDefConfig
    name: accum
    kind: output_tensor
    type_var: AccumType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d3,
      d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d2, d4,
      d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d3,
      d4)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: accum
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: accum
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: lhs
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul_transpose_b
  cpp_class_name: MatmulTransposeBOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs with rhs operand transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matmul
  cpp_class_name: BatchMatmulOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: quantized_batch_matmul
  cpp_class_name: QuantizedBatchMatmulOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. The quantized variant
    includes zero-point adjustments for the left and right operands of the
    matmul.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: AZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: BZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: A
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: AZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: B
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: BZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matmul_transpose_b
  cpp_class_name: BatchMatmulTransposeBOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs where rhs operand has its non-batch
    dimensions transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_reduce_matmul
  cpp_class_name: BatchReduceMatmulOp
  doc: |-
    Performs a batch-reduce matrix multiplication of two 3D inputs.
    The partial multiplication results are reduced into a 2D output.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d1, d2)>
  iterator_types:
  - reduction
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: mul
                operands:
                - !ScalarExpression
                  scalar_arg: A
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matvec
  cpp_class_name: MatvecOp
  doc: |-
    Performs a matrix-vector multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: y
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s1)>
  - !LinalgOperandDefConfig
    name: x
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s0)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: x
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: x
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: y
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: vecmat
  cpp_class_name: VecmatOp
  doc: |-
    Performs a vector-matrix multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: y
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0)>
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: x
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1, d0)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: x
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: x
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: y
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matvec
  cpp_class_name: BatchMatvecOp
  doc: |-
    Performs a batched matrix-vector multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: dot
  cpp_class_name: DotOp
  doc: |-
    Performs a dot product of two vectors to a scalar result.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0] -> (s0)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0] -> (s0)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0] -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0)[s0] -> (d0)>
    - affine_map<(d0)[s0] -> (d0)>
    - affine_map<(d0)[s0] -> ()>
  iterator_types:
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d
  cpp_class_name: Conv1DOp
  doc: |-
    Performs 1-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0 + s1)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s1)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s0)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d0 + d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d
  cpp_class_name: Conv2DOp
  doc: |-
    Performs 2-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0 + s1, s2 + s3)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 + d2, d1 + d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d2, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d
  cpp_class_name: Conv3DOp
  doc: |-
    Performs 3-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0 + s1, s2 + s3, s4 + s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s3, s5)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s2, s4)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0 + d3, d1
      + d4, d2 + d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d3, d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d_nwc_wcf
  cpp_class_name: Conv1DNwcWcfOp
  doc: |-
    Performs 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
      s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
      + d3 * s4, d4)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d3, d4, d2)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d_ncw_fcw
  cpp_class_name: Conv1DNcwFcwOp
  doc: |-
    Performs 1-D convolution.

    Layout:
      * Input: NCW.
      * Kernel: FCW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s2 * s3 + s4
      * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s6, s1, s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s6, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d3, d2 *
      s3 + d4 * s5)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d1, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_hwcf
  cpp_class_name: Conv2DNhwcHwcfOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NHWC.
      * Kernel: HWCF.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d4, d5, d6, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_fhwc
  cpp_class_name: Conv2DNhwcFhwcOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NHWC.
      * Kernel: FHWC.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
      s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d3, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_hwcf_q
  cpp_class_name: Conv2DNhwcHwcfQOp
  doc: |-
    Performs 2-D convolution with zero point offsets.

    Layout:
      * Input: NHWC.
      * Kernel: HWCF.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d4, d5, d6, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nchw_fchw
  cpp_class_name: Conv2DNchwFchwOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NCHW.
      * Kernel: FCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
      s1, s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s10, s2, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s3, s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s5, s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d1, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_ngchw_fgchw
  cpp_class_name: Conv2DNgchwFgchwOp
  doc: |-
    Performs 2-D grouped convolution.

    Layout:
      * Input: NGCHW.
      * Kernel: FGCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s11, s1, s2, s5, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s11, s1, s3, s7)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s4, s8)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s6, s10)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d_ndhwc_dhwcf
  cpp_class_name: Conv3DNdhwcDhwcfOp
  doc: |-
    Performs 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d_ndhwc_dhwcf_q
  cpp_class_name: Conv3DNdhwcDhwcfQOp
  doc: |-
    Performs 3-D convolution with zero point offsets.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_1d_nwc_wc
  cpp_class_name: DepthwiseConv1DNwcWcOp
  doc: |-
    Performs depth-wise 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3, s5)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_1d_nwc_wcm
  cpp_class_name: DepthwiseConv1DNwcWcmOp
  doc: |-
    Performs depth-wise 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
      s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
      + d4 * s4, d2)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d4, d2, d3)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2,
      d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwc
  cpp_class_name: DepthwiseConv2DNhwcHwcOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nchw_chw
  cpp_class_name: DepthwiseConv2DNchwChwOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s1, s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d3, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d3, d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d3, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwc_q
  cpp_class_name: DepthwiseConv2DNhwcHwcQOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwcm
  cpp_class_name: DepthwiseConv2DNhwcHwcmOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s9, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d5, d6, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwcm_q
  cpp_class_name: DepthwiseConv2DNhwcHwcmQOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s9, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d5, d6, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_3d_ndhwc_dhwc
  cpp_class_name: DepthwiseConv3DNdhwcDhwcOp
  doc: |-
    Performs depth-wise 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11, s13)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
      * s10 + d6 * s12, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d4, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_3d_ndhwc_dhwcm
  cpp_class_name: DepthwiseConv3DNdhwcDhwcmOp
  doc: |-
    Performs depth-wise 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d8, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_sum
  cpp_class_name: PoolingNhwcSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NHWC.
      * Kernel: HW.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nchw_sum
  cpp_class_name: PoolingNchwSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NCHW.
      * Kernel: HW.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_max
  cpp_class_name: PoolingNhwcMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_max_unsigned
  cpp_class_name: PoolingNhwcMaxUnsignedOp
  doc: |-
    Performs unsigned max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nchw_max
  cpp_class_name: PoolingNchwMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_min
  cpp_class_name: PoolingNhwcMinOp
  doc: |-
    Performs min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_min_unsigned
  cpp_class_name: PoolingNhwcMinUnsignedOp
  doc: |-
    Performs unsigned min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_sum
  cpp_class_name: PoolingNwcSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NWC.
      * Kernel: W.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ncw_sum
  cpp_class_name: PoolingNcwSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NCW.
      * Kernel: W.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
      * s5)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_max
  cpp_class_name: PoolingNwcMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_max_unsigned
  cpp_class_name: PoolingNwcMaxUnsignedOp
  doc: |-
    Performs unsigned max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ncw_max
  cpp_class_name: PoolingNcwMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
      * s5)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_min
  cpp_class_name: PoolingNwcMinOp
  doc: |-
    Performs min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_min_unsigned
  cpp_class_name: PoolingNwcMinUnsignedOp
  doc: |-
    Performs unsigned min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_sum
  cpp_class_name: PoolingNdhwcSumOp
  doc: |-
    Performs 3D sum pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_max
  cpp_class_name: PoolingNdhwcMaxOp
  doc: |-
    Performs 3D max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_min
  cpp_class_name: PoolingNdhwcMinOp
  doc: |-
    Performs 3D min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: fill
  cpp_class_name: FillOp
  doc: |-
    Fills the output tensor with the given value.

    Works for arbitrary ranked output tensors since the operation performs scalar
    accesses only and is thus rank polymorphic. Numeric casting is performed on
    the value operand, promoting it to the same data type as the output.
  implements:
  - LinalgFillOpInterface
  defines:
  - hasCanonicalizer
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: value
    kind: scalar
    type_var: T1
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        fn_name: cast_signed
        type_var: U
        operands:
        - !ScalarExpression
          scalar_arg: value
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: fill_rng_2d
  cpp_class_name: FillRng2DOp
  doc: |-
    Fills the output tensor with pseudo random numbers.

    The operation generations pseudo random numbers using a linear congruential
    generator. It provides no guarantees regarding the distribution of the
    generated random numbers. Instead of generating the random numbers
    sequentially, it instantiates one random number generator per data element
    and runs them in parallel. The seed operand and the indices of the data
    element seed the random number generation. The min and max operands limit
    the range of the generated random numbers.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: min
    kind: scalar
    type_var: F64
  - !LinalgOperandDefConfig
    name: max
    kind: scalar
    type_var: F64
  - !LinalgOperandDefConfig
    name: seed
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        fn_name: cast_signed
        type_var: T
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: add
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: mul
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: binary
                    fn_name: add
                    operands:
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_const: '2147483647 : i64'
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_fn:
                            kind: binary
                            fn_name: add
                            operands:
                            - !ScalarExpression
                              scalar_fn:
                                kind: binary
                                fn_name: mul
                                operands:
                                - !ScalarExpression
                                  scalar_fn:
                                    kind: binary
                                    fn_name: add
                                    operands:
                                    - !ScalarExpression
                                      scalar_fn:
                                        kind: type
                                        fn_name: cast_signed
                                        type_var: I32
                                        operands:
                                        - !ScalarExpression
                                          scalar_index: 1
                                    - !ScalarExpression
                                      scalar_fn:
                                        kind: binary
                                        fn_name: add
                                        operands:
                                        - !ScalarExpression
                                          scalar_fn:
                                            kind: binary
                                            fn_name: mul
                                            operands:
                                            - !ScalarExpression
                                              scalar_fn:
                                                kind: binary
                                                fn_name: add
                                                operands:
                                                - !ScalarExpression
                                                  scalar_fn:
                                                    kind: type
                                                    fn_name: cast_signed
                                                    type_var: I32
                                                    operands:
                                                    - !ScalarExpression
                                                      scalar_index: 0
                                                - !ScalarExpression
                                                  scalar_arg: seed
                                            - !ScalarExpression
                                              scalar_fn:
                                                kind: type
                                                fn_name: cast_signed
                                                type_var: I32
                                                operands:
                                                - !ScalarExpression
                                                  scalar_const: '1103515245 : i64'
                                        - !ScalarExpression
                                          scalar_fn:
                                            kind: type
                                            fn_name: cast_signed
                                            type_var: I32
                                            operands:
                                            - !ScalarExpression
                                              scalar_const: '12345 : i64'
                                - !ScalarExpression
                                  scalar_fn:
                                    kind: type
                                    fn_name: cast_signed
                                    type_var: I32
                                    operands:
                                    - !ScalarExpression
                                      scalar_const: '1103515245 : i64'
                            - !ScalarExpression
                              scalar_fn:
                                kind: type
                                fn_name: cast_signed
                                type_var: I32
                                operands:
                                - !ScalarExpression
                                  scalar_const: '12345 : i64'
                - !ScalarExpression
                  scalar_fn:
                    kind: binary
                    fn_name: mul
                    operands:
                    - !ScalarExpression
                      scalar_fn:
                        kind: binary
                        fn_name: sub
                        operands:
                        - !ScalarExpression
                          scalar_arg: max
                        - !ScalarExpression
                          scalar_arg: min
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_const: '2.3283063999999999E-10 : f64'
            - !ScalarExpression
              scalar_arg: min

